home *** CD-ROM | disk | FTP | other *** search
-
- #ifndef _IX_INCLUDED_
- #define _IX_INCLUDED_
-
- #import <stdio.h>
- #import <mach.h>
- #import <streams/streams.h>
- #import "btree/BTreeFile.h"
-
- #define ixMaxKeyLength 255
- #define ixMinKeyLength 2
- #define ixMaxRegExps 512
-
- // enumeration of error codes reported
- typedef enum {
- ixErrorNoError, // no error: the normal state of affairs
- ixErrorNoMemory, // sufficient memory is not available
- ixErrorInternal, // an internal inconsistency was found
- ixErrorSystem = 100 // base value added to system variable errno
- } ixErrorType;
-
- typedef struct {
- unsigned short regexCount;
- struct regex *regexTable[ixMaxRegExps];
- } ixRegexTable;
-
- typedef struct {
- float threshold;
- float percentage;
- unsigned char absolute;
- unsigned char peculiar;
- unsigned char discontract;
- unsigned char foldToLower;
- unsigned char foldPlurals;
- unsigned long minCount;
- unsigned long maxWords;
- unsigned long windowShift;
- unsigned long windowSize;
- WFTable *wordDomain;
- } ixHistRecord;
-
- typedef struct {
- void *userRecord;
- unsigned char writeOffsets;
- unsigned char writeUnknown;
- unsigned char dispunctuate;
- void (*controlWord)(char *, void *);
- vm_offset_t (*readOffset)(void *);
- void (*writeOffset)(vm_offset_t, void *);
- int (*charReader)(void *);
- int (*charWriter)(int, void *);
- int (*charReturn)(int, void *);
- } ixRTFRecord;
-
- typedef struct FileCell {
- char *file; // name of file referenced
- char *body; // pointer to out-of-line data
- char *desc; // a short human readable description
- char *type; // the type of the object, if known
- char *regex; // a regular expression for object
- char *message; // a message to be sent for object
- char *data; // a copy of the image of the FileCell
- struct FileCell *next; // for creating a linked list
- unsigned long id; // unique key and id number
- unsigned long pointer; // a pointer to another FileCell
- unsigned long size; // the size of the FileCell
- unsigned long date; // the date the FileCell was written
- unsigned long mode; // access modes at the time of writing
- unsigned long offset; // offset of referenced object
- unsigned long length; // length of referenced object
- unsigned long line; // line no. of referenced object
- unsigned long bodysize; // amount of data stored at 'body'
- struct stat statbuf; // for saving the results of a 'stat'
- char status; // the status of the FileCell
- } FileCell;
-
-
- // A Reference is a weighted pointer to a FileCell
- typedef struct Reference {
- float weight;
- FileCell *f;
- } Reference;
-
- // A RefList is a list of FileCells, as returned by ixGetRefList
- typedef struct RefList {
- unsigned long n;
- Reference *r;
- } RefList;
-
- // A Registration associates a FileCell and indexing date with a file name
- typedef struct Registration {
- unsigned long id;
- unsigned long date;
- } Registration;
-
- // An Index is a descriptor providing access to an index.
- typedef struct Index {
- char *name; // the name of the index file
- char *folder; // the name of the folder
- int flag; // a flag word for the index
- BTreeCursor *registryCursor;
- BTreeCursor *keywordsCursor;
- BTreeCursor *fileCellCursor;
- BTreeFile *btreeFile; // a handle to the index file
- } Index;
-
-
- // flag bits used in the flag word of the index descriptor
- #define ixFlagNewIndex (1 << 0) // new index under construction
- #define ixFlagCreating (1 << 1) // during file creation process
-
- // macro for closing an array of Index descriptors returned by ixOpenArray
- #define ixCloseArray(ix) while (*(ix)) ixClose(*(ix)++)
-
- typedef enum {
- ixSortByDate, ixSortByWeight,
- ixSortByDescription, ixSortByType, ixSortByName
- } ixSortMethod;
-
- typedef enum {
- ixSortReverse, ixSortForward
- } ixSortDirection;
-
- typedef enum {
- ixSearchByPrefix, ixSearchByFullWord, ixSearchAnywhere
- } ixSearchMethod;
-
- typedef enum {
- ixMatchFileName, ixMatchContent, ixMatchFileNameOrContent
- } ixMatchAgainst;
-
- typedef enum {
- ixLiteralString, ixRegularExpression
- } ixQuerySemantic;
-
- extern ixErrorType ixErrorNo;
- extern int ixErrors;
- extern int ixError(ixErrorType errno, char *fmt, ...);
-
- // flags used to specify indexing characteristics
- extern char *ixFileExtension; // file extension for index files
- extern char *ixStopWord; // name of stop word list file
- extern char *ixStopWordDefault; // appended to /usr/lib/indexing/files
- extern int ixIndexFileNames; // if true, put file names in index
-
- // indexing characteristics examined at query time
- extern int ixFoldToLower; // if true, fold keys to lower case
- extern int ixFoldPlurals; // if true, plural forms are folded
-
- // flags used to specify querying characteristics
- extern int ixNoCaseQuery; // if true, query is case insensitive
- extern int ixFindPlurals; // if true, check plural and singular
-
- // regular expression tables used by indexing and searching
- extern ixRegexTable _ixBadNameTable;
- extern ixRegexTable _ixGoodNameTable;
-
- // routines for working with regular expression tables
- inline static int
- _ixCheckRegexTable(char *itemString, ixRegexTable *regexTable)
- {
- int i;
-
- for (i = 0; i < regexTable->regexCount; ++i)
- if (re_match_exactly(itemString, regexTable->regexTable[i]))
- return 1;
-
- return 0;
- }
-
- inline static int
- _ixIsGoodName(char *fileName)
- {
- return _ixGoodNameTable.regexCount ?
- _ixCheckRegexTable(fileName, &_ixGoodNameTable) :
- ! _ixCheckRegexTable(fileName, &_ixBadNameTable);
- }
-
- extern int _ixItemToRegexTable(char *itemString,
- ixRegexTable *regexTable, int shellSyntax);
- extern int _ixFileToRegexTable(char *fileName,
- ixRegexTable *regexTable, int shellSyntax);
- extern int _ixPathToRegexTable(char *parentName, char *fileName,
- ixRegexTable *regexTable, int shellSyntax);
-
- // routines for opening, closing, flushing and removing indices
- extern int _ixUnlink(char *name);
- extern int ixUnlink(char *name);
- extern Index *ixOpen(char *name, char *mode);
- extern Index **ixOpenArray(char **path, char *mode);
- extern Index *ixOpenParent(char *file);
- extern Index *ixCleanIndex(Index *ix, int verbose);
- extern void ixFlush(Index *ix);
- extern void ixClose(Index *ix);
-
- // routines for managing FileCells
- extern void _ixLoadFileCell(unsigned long id, FileCell *fileCell);
- extern int _ixMergeFileCell(FileCell *fileCell, FileCell *tempFileCell);
- extern FileCell *_ixReadFileCell(Index *ix, unsigned long id);
- extern FileCell *_ixReadFirstFileCell(Index *ix);
- extern FileCell *_ixReadNextFileCell(Index *ix);
- extern int _ixWriteFileCell(Index *ix, FileCell *fileCell);
- extern FileCell *ixCopyFileCell(FileCell *fileCell, char *file);
- extern FileCell *ixFreeFileCell(FileCell *fileCell);
- extern FileCell *ixInitFileCell(char *file, char *type, char *desc,
- char *message, char *regex, struct stat *b);
- extern FileCell *ixLoadFileCell(Index *ix, FileCell *fileCell);
- extern FileCell *ixMergeFileCell(FileCell *fileCell, FileCell *tempFileCell);
- extern FileCell *ixNewFileCell();
- extern FileCell *ixReadFileCell(Index *ix, unsigned long id);
- extern FileCell *ixReadFirstFileCell(Index *ix);
- extern FileCell *ixReadNextFileCell(Index *ix);
- extern int ixRemoveFileCell(Index *ix, FileCell *fileCell, int remove);
- extern int ixRemoveFileCellById(Index *ix, unsigned long id, int remove);
- extern int ixRemoveFileCellByName(Index *ix, char *name, int remove);
- extern int ixWriteFileCell(Index *ix, FileCell *fileCell);
- extern int ixWriteStatus(Index *ix, FileCell *fileCell, char status);
-
- // routines for managing FileCell registration
- extern Registration *ixGetRegistration(Index *ix, char *name);
- extern unsigned long ixGetRegistrationId(Index *ix, char *name);
- extern FileCell *ixGetRegistrationFileCell(Index *ix, char *name);
- extern unsigned long ixGetRegistrationDate(Index *ix, char *name);
- extern int ixPutRegistration(Index *ix, FileCell *f);
- extern int ixClearRegistrationByName(Index *ix, char *name);
- extern int ixClearRegistration(Index *ix, FileCell *f);
-
- // routines for printing the contents of FileCells and indices
- extern void ixPrintFileCell(FileCell *f, float weight);
- extern void ixPrintFileCellById(Index *ix, unsigned long id, int pointerFlag);
- extern void ixPrintIndexQuery(Index *ix, char *queryString,
- ixSearchMethod searchBy, ixMatchAgainst matchAgainst,
- ixQuerySemantic querySemantic,
- ixSortMethod sortBy, ixSortDirection sortDirection);
- extern void ixPrintSearchQuery(char *applyFolder, char *queryString,
- ixSearchMethod searchBy, ixMatchAgainst matchAgainst,
- ixQuerySemantic querySemantic,
- ixSortMethod sortBy, ixSortDirection sortDirection);
- extern void ixPrintRefList(Index *ix, char *s, ixSearchMethod searchBy,
- ixSortMethod sortBy, ixSortDirection sortDirection);
-
- // routines for managing the string matching tables
- typedef NXHashTable *ixMatchTable;
-
- extern ixMatchTable _ixStopWordTable;
- extern ixMatchTable _ixBadTypeTable;
- extern ixMatchTable _ixGoodTypeTable;
- extern ixMatchTable _ixTextTypeTable;
-
- inline static int
- _ixItemToMatchTable(char *itemString, ixMatchTable *matchTable)
- {
- if (! *matchTable &&
- ! (*matchTable = NXCreateHashTable(NXStrPrototype, 64, 0)))
- return 0;
-
- return ! NXHashInsert(*matchTable, strsave(itemString));
- }
-
- inline static int
- _ixCheckMatchTable(char *itemString, ixMatchTable *matchTable)
- {
- return *matchTable ? NXHashMember(*matchTable, itemString) : 0;
- }
-
- inline static int
- _ixIsGoodType(char *fileType)
- {
- return _ixGoodTypeTable ? NXHashMember(_ixGoodTypeTable, fileType) :
- (!_ixBadTypeTable || !NXHashMember(_ixBadTypeTable, fileType));
- }
-
- extern int _ixFileToMatchTable(char *fileName, ixMatchTable *matchTable);
- extern int _ixPathToMatchTable(char *parentName,
- char *fileName, ixMatchTable *matchTable);
- extern void ixLoadStopWordTable(void);
-
- #define _ixLoadStopWordTable(fileName) \
- _ixFileToMatchTable(fileName, &_ixStopWordTable)
-
- #define _ixStopWordEnter(stopWord) \
- _ixItemToMatchTable(stopWord, &_ixStopWordTable)
-
- inline static const char
- *_ixStopWordCheck(const char *stopWord)
- {
- return (const char *) (_ixStopWordTable ?
- NXHashGet(_ixStopWordTable, stopWord) : 0);
- }
-
- inline static void
- _ixClearStopWordTable(void)
- {
- if (_ixStopWordTable)
- NXResetHashTable(_ixStopWordTable);
- }
-
- // routines for testing and creating keyword/FileCell associations
- extern int _ixAssociate(Index *ix, char *k, unsigned long id, float w);
- extern unsigned long ixAssociate(Index *ix, char *k,
- unsigned long id, float w);
- extern int ixAssociated(Index *ix, char *s, unsigned long id);
- extern unsigned long ixAddFileCell(Index *ix, NXStream *keys, FileCell *f);
-
- // routines for getting, clearing and sorting RefLists
- extern int ixOrRefList(RefList *sourceList, RefList *targetList);
- extern int ixAndRefList(RefList *sourceList, RefList *targetList);
- extern int ixAndNotRefList(RefList *sourceList, RefList *targetList);
- extern void ixSortRefList(RefList *refList,
- ixSortMethod sortBy, ixSortDirection sortDirection);
- extern RefList *ixClearRefList(RefList *refList);
- extern RefList *ixGetRefList(Index *ix, char *queryString,
- RefList *refList, ixSearchMethod searchBy, int (*interrupt)());
- extern RefList ixIndexQuery(Index *ix, char *queryString,
- ixSearchMethod searchBy, ixMatchAgainst matchAgainst,
- ixQuerySemantic querySemantic, int (*interrupt)());
- extern RefList ixSearchQuery(char *applyFolder, char *queryString,
- ixSearchMethod searchBy, ixMatchAgainst matchAgainst,
- ixQuerySemantic querySemantic, int (*interrupt)());
-
- // routines for managing file names and paths
- extern char *ixParentFolder(Index *ix);
- extern char *_ixFromParentFolder(Index *ix, char *file);
- extern char *ixFullPath(Index *ix, char *file);
-
- // routines for filtering rich text and generating histograms
- extern int ixHistogram(NXStream *inputStream,
- NXStream *outputStream, ixHistRecord *histRecord);
- extern void _ixReadRichText(ixRTFRecord *rtfRecord);
- extern void ixReadRichText(NXStream *inputStream,
- NXStream *outputStream, unsigned long outputLimit);
-
-
- #endif
-
-